Datasets

library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(datasets)
nrow(mpg)
## [1] 234
ncol(mpg)
## [1] 11

Gráfico de Linhas

library(plotly)

trace_0 <- rnorm(100, mean = 5)
trace_1 <- rnorm(100, mean = 0)
trace_2 <- rnorm(100, mean = -5)
x <- c(1:100)

data <- data.frame(x, trace_0, trace_1, trace_2)

fig <- plot_ly(data, x = ~x)
fig <- fig %>% add_trace(y = ~trace_0, name = 'Padrão 1',mode = 'lines')
fig <- fig %>% add_trace(y = ~trace_1, name = 'Padrão 2', mode = 'lines+markers')
fig <- fig %>% add_trace(y = ~trace_2, name = 'Padrão 3', mode = 'markers')

fig
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter

Gráfico de Barras

Segue um gráfico de barras

fig = plot_ly(data = mpg, x = ~model, y = ~year, type = 'bar')

fig

Filtrando instancias

str(mpg)
## tibble [234 × 11] (S3: tbl_df/tbl/data.frame)
##  $ manufacturer: chr [1:234] "audi" "audi" "audi" "audi" ...
##  $ model       : chr [1:234] "a4" "a4" "a4" "a4" ...
##  $ displ       : num [1:234] 1.8 1.8 2 2 2.8 2.8 3.1 1.8 1.8 2 ...
##  $ year        : int [1:234] 1999 1999 2008 2008 1999 1999 2008 1999 1999 2008 ...
##  $ cyl         : int [1:234] 4 4 4 4 6 6 6 4 4 4 ...
##  $ trans       : chr [1:234] "auto(l5)" "manual(m5)" "manual(m6)" "auto(av)" ...
##  $ drv         : chr [1:234] "f" "f" "f" "f" ...
##  $ cty         : int [1:234] 18 21 20 21 16 18 18 18 16 20 ...
##  $ hwy         : int [1:234] 29 29 31 30 26 26 27 26 25 28 ...
##  $ fl          : chr [1:234] "p" "p" "p" "p" ...
##  $ class       : chr [1:234] "compact" "compact" "compact" "compact" ...

Contando os valores do atributo “manufacturer” e pegando esses valores:

counts = table(mpg$manufacturer)

values = unique(mpg$manufacturer)

Nosso primeiro histograma:

fig = plot_ly(x = values, y = counts, type = 'bar',name=values)
fig = fig %>% layout(xaxis = list(title = 'Modelo'), yaxis = list(title = 'Ano'))
fig

Podemos testar também o gráfico de barras empilhado:

counts = table(mpg$manufacturer)

fig <- plot_ly(data = mpg, x = ~model, y = ~year, type = 'bar')
#fig <- fig %>% layout(yaxis = list(title = 'Count'), barmode = 'stack')

fig

Vamos comparar agora apenas os carros das fabricantes Toyota e Volkswagen

toyota = mpg[mpg$manufacturer == 'toyota',]
volks = mpg[mpg$manufacturer == 'volkswagen',]

Como fica a Toyota:

toyota

Como fica Volkswagen:

volks

Gráfico de Pizza

fig <- plot_ly(data = volks, labels = volks$year, x=volks$year, type = 'pie')

fig
## Warning: 'pie' objects don't have these attributes: 'x'
## Valid attributes include:
## 'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'meta', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'labels', 'label0', 'dlabel', 'values', 'marker', 'text', 'hovertext', 'scalegroup', 'textinfo', 'hoverinfo', 'hovertemplate', 'texttemplate', 'textposition', 'textfont', 'insidetextorientation', 'insidetextfont', 'outsidetextfont', 'automargin', 'title', 'domain', 'hole', 'sort', 'direction', 'rotation', 'pull', '_deprecated', 'idssrc', 'customdatasrc', 'metasrc', 'labelssrc', 'valuessrc', 'textsrc', 'hovertextsrc', 'hoverinfosrc', 'hovertemplatesrc', 'texttemplatesrc', 'textpositionsrc', 'pullsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'

Gráfico de Pizza

fig <- plot_ly(data = toyota, labels = toyota$class, x= toyota$class, type = 'pie')

fig
## Warning: 'pie' objects don't have these attributes: 'x'
## Valid attributes include:
## 'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'meta', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'labels', 'label0', 'dlabel', 'values', 'marker', 'text', 'hovertext', 'scalegroup', 'textinfo', 'hoverinfo', 'hovertemplate', 'texttemplate', 'textposition', 'textfont', 'insidetextorientation', 'insidetextfont', 'outsidetextfont', 'automargin', 'title', 'domain', 'hole', 'sort', 'direction', 'rotation', 'pull', '_deprecated', 'idssrc', 'customdatasrc', 'metasrc', 'labelssrc', 'valuessrc', 'textsrc', 'hovertextsrc', 'hoverinfosrc', 'hovertemplatesrc', 'texttemplatesrc', 'textpositionsrc', 'pullsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'

Box Plot

fig <- plot_ly(y=volks$displ, type = "box", name="Prices Volkswagen")
fig <- fig %>% add_trace(y=toyota$displ, name="Prices Toyota")


fig
summary(volks$displ)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.800   2.000   2.000   2.256   2.500   3.600
fig <- plot_ly(y=volks$displ, type = "box", boxpoints = "all", name="Prices Volkswagen")
fig <- fig %>% add_trace(y=toyota$displ, name="Prices Toyota")


fig

Gráfico de Dispersão

fig = plot_ly(data = iris, x = ~Sepal.Length, y = ~Petal.Length)

fig
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No scatter mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
library(plotly)

pal = c("red", "blue", "green")
pal = setNames(pal, c("virginica", "setosa", "versicolor"))

fig = plot_ly(data = iris, x = ~Sepal.Length, y = ~Petal.Length, color = ~Species, colors = pal)

fig
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No scatter mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode

Matrizes de gráficos de dispersão

axis = list(showline=FALSE,
            zeroline=FALSE,
            gridcolor='#ffff',
            ticklen=4,
            titlefont=list(size=13))

fig <- iris %>%
  plot_ly()
fig <- fig %>%
  add_trace(
    type = 'splom',
    dimensions = list(
      list(label='sepal length', values=~Sepal.Length),
      list(label='sepal width', values=~Sepal.Width),
      list(label='petal length', values=~Petal.Length),
      list(label='petal width', values=~Petal.Width)
    ),
    color = ~Species, colors = c('#636EFA','#EF553B','#00CC96') ,
    marker = list(
      size = 7,
      line = list(
        width = 1,
        color = 'rgb(230,230,230)'
      )
    )
  )
fig <-  fig %>% style(diagonal = list(visible = FALSE))
fig <- fig %>%
  layout(
    hovermode='closest',
    dragmode= 'select',
    plot_bgcolor='rgba(240,240,240, 0.95)',
    xaxis=list(domain=NULL, showline=F, zeroline=F, gridcolor='#ffff', ticklen=4),
    yaxis=list(domain=NULL, showline=F, zeroline=F, gridcolor='#ffff', ticklen=4),
    xaxis2=axis,
    xaxis3=axis,
    xaxis4=axis,
    yaxis2=axis,
    yaxis3=axis,
    yaxis4=axis
  )

fig

Podemos utilizar a análise das componentes principais (PCA) para alterar a representação dos dados originais:

Um bom tutorial sobre PCA pode ser encontrado neste link

Variância acumulada das componentes principais:

pca = prcomp(iris[,1:4])
summary(pca)
## Importance of components:
##                           PC1     PC2    PC3     PC4
## Standard deviation     2.0563 0.49262 0.2797 0.15439
## Proportion of Variance 0.9246 0.05307 0.0171 0.00521
## Cumulative Proportion  0.9246 0.97769 0.9948 1.00000

Modificando a base de dados:

library(plotly)
library(stats)
data(iris)
X <- subset(iris, select = -c(Species))
prin_comp <- prcomp(X, rank. = 2)
components <- prin_comp[["x"]]
components <- data.frame(components)
components <- cbind(components, iris$Species)
components$PC2 <- -components$PC2

fig <- plot_ly(components, x = ~PC1, y = ~PC2, color = ~iris$Species, colors = c('#636EFA','#EF553B','#00CC96'), type = 'scatter', mode = 'markers')%>%
  layout(
    legend=list(title=list(text='color')),
    plot_bgcolor='#e5ecf6',
    xaxis = list(
      title = "0",
      zerolinecolor = "#ffff",
      zerolinewidth = 2,
      gridcolor='#ffff'),
    yaxis = list(
      title = "1",
      zerolinecolor = "#ffff",
      zerolinewidth = 2,
      gridcolor='#ffff'))
fig
install.packages("tsne")
## Installing package into '/home/viniciusrpb/R/x86_64-pc-linux-gnu-library/3.6'
## (as 'lib' is unspecified)

Agora vamos realizar a apresentação dos dados

library(tsne)

features = subset(iris, select = -c(Species)) 

set.seed(0)
tsne <- tsne(features, initial_dims = 2)
## sigma summary: Min. : 0.376979658833158 |1st Qu. : 0.45299119244845 |Median : 0.509480199794486 |Mean : 0.520650714341092 |3rd Qu. : 0.579571467464058 |Max. : 0.758492715638686 |
## Epoch: Iteration #100 error is: 11.2163844747471
## Epoch: Iteration #200 error is: 0.0778396135053768
## Epoch: Iteration #300 error is: 0.0763256129519401
## Epoch: Iteration #400 error is: 0.0754813401106922
## Epoch: Iteration #500 error is: 0.074925896727087
## Epoch: Iteration #600 error is: 0.0746078353723582
## Epoch: Iteration #700 error is: 0.0743992115035749
## Epoch: Iteration #800 error is: 0.0742601821249679
## Epoch: Iteration #900 error is: 0.0741669765260436
## Epoch: Iteration #1000 error is: 0.0741048938206219
tsne <- data.frame(tsne)
pdb <- cbind(tsne,iris$Species)
fig <-  plot_ly(data = pdb ,x =  ~X1, y = ~X2, type = 'scatter', split = ~iris$Species)

fig
## No scatter mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode

não-supervisionado